access data using rvest
url <- "https://www.pori.hk/nationalissues/taiwan-independence"
df <- url %>%
read_html()%>%
html_nodes(xpath = '//*[@id=\"block-yui_3_17_2_1_1563182649066_3794\"]/div/div[2]/table')%>%
html_table()
PORI_data <- df[[1]]
str(PORI_data)
## 'data.frame': 109 obs. of 8 variables:
## $ X1: chr "調查日期\n \n Date of survey" "2-8/7/2019" "7-11/1/2019" "6-9/8/2018" ...
## $ X2: chr "樣本人數\n \n Total Sample" "1025" "1007" "1000" ...
## $ X3: chr "次樣本人數\n \n Sub-sample" "624" "526" "553" ...
## $ X4: chr "贊成\n \n Support" "43.7%" "34.5%" "34.4%" ...
## $ X5: chr "反對\n \n Oppose" "44.1%" "50.5%" "54.1%" ...
## $ X6: chr "難講/唔知道\n \n DK/HS" "12.3%" "15.0%" "11.5%" ...
## $ X7: chr "合計\n \n Total" "100.0%" "100.0%" "100.0%" ...
## $ X8: chr "淨值\n \n Net value" "-0.4%" "-15.9%" "-19.7%" ...
colnames(PORI_data) <- c("Date", "Total Sample","Sub-sample","Support","Oppose","DKnow","Total","Net value")
clean
clean <- PORI_data %>%
slice(-1)%>%
mutate(Date = sub(".*-","",Date))%>%
mutate(Date = str_replace_all(Date,"/1/","/01/"))%>%
mutate(Date = str_replace_all(Date,"/2/","/02/"))%>%
mutate(Date = str_replace_all(Date,"/3/","/03/"))%>%
mutate(Date = str_replace_all(Date,"/4/","/04/"))%>%
mutate(Date = str_replace_all(Date,"/5/","/05/"))%>%
mutate(Date = str_replace_all(Date,"/6/","/06/"))%>%
mutate(Date = str_replace_all(Date,"/7/","/07/"))%>%
mutate(Date = str_replace_all(Date,"/8/","/08/"))%>%
mutate(Date = str_replace_all(Date,"/9/","/09/"))%>%
mutate(Date = str_replace_all(Date,"/1/","/01/"))
clean$Date <- as.POSIXct(strptime(clean$Date,"%e/%m/%Y"))
clean$Support <- sub("%","",clean$Support)%>%as.numeric()
clean$Oppose <- sub("%","",clean$Oppose)%>%as.numeric()
clean$DKnow <- sub("%","",clean$DKnow)%>%as.numeric()
#clean <- clean%>%drop_na()
plot
#clean$Date <- factor(clean$Date, levels = clean[["Date"]])
#clean$Date <- as.Date(clean$Date)
p <- plot_ly(x = ~clean$Date, y = clean$Support, mode = 'lines', type = 'scatter', name = '支持',line = list(color = '#FF7F0E') )%>%
add_trace(y = ~clean$Oppose, name = '反對', line = list(color = 'rgb(22, 96, 167)')) %>%
add_trace(y = ~clean$DKnow, name = '無所謂', line = list(color = '#2CA02C'),opacity=0.39) %>%
layout(title = list(x = 0.5, font = list(size = 24, family = "Ariel"),
text = "Hong Kong people support Taiwan independence"),
xaxis = list(title = "Years"),
yaxis = list (title = "%"),
plot_bgcolor = "rgb(247, 247, 247)",
paper_bgcolor = "rgb(247, 247, 247)",
legend = list(x = 1.02, y = 1.02),
margin = list(t = 102)
)
#save(clean,p,file = "hktw.rda")
load(file = "hktw.rda")
p